package com.shujia.spark.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo17Distinct {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()

    conf.setAppName("group")

    conf.setMaster("local")

    val sc = new SparkContext(conf)

    val rdd1: RDD[Int] = sc.parallelize(List(1, 2, 3, 4, 5, 7, 87, 9, 4, 4, 3, 2, 4, 5, 6))

    /**
      * distinct: 对数据去重，会产生shuffle
      *
      */
    val distinctRDD: RDD[Int] = rdd1.distinct()

    distinctRDD.foreach(println)
  }

}
